home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 2007 December
/
PCWKCD1207B.iso
/
Blogowanie poza sfera
/
Flock 1.0 beta
/
flock-1.0RC3.en-US.win32.exe
/
flock
/
components
/
flockIndexer.js
< prev
next >
Wrap
Text File
|
2007-10-18
|
22KB
|
678 lines
// BEGIN FLOCK GPL
//
// Copyright Flock Inc. 2005-2007
// http://flock.com
//
// This file may be used under the terms of of the
// GNU General Public License Version 2 or later (the "GPL"),
// http://www.gnu.org/licenses/gpl.html
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
// for the specific language governing rights and limitations under the
// License.
//
// END FLOCK GPL
const CLASS_ID = Components.ID("{C2BFF231-7A51-4764-8409-A1B22B2F5147}");
const CLASS_NAME = "Flock Indexer";
const CONTRACT_ID = "@flock.com/indexer;1";
const FLOCK_NS = "http://flock.com/rdf#";
const NSCP_NS = "http://home.netscape.com/NC-rdf#";
const PROP_URL = NSCP_NS + "URL";
const PROP_NAME = NSCP_NS + "Name";
const PROP_CHILD = NSCP_NS + "child";
const Cc = Components.classes;
const Ci = Components.interfaces;
const Cr = Components.results;
/* from nspr's prio.h */
const PR_RDONLY = 0x01;
const PR_WRONLY = 0x02;
const PR_RDWR = 0x04;
const PR_CREATE_FILE = 0x08;
const PR_APPEND = 0x10;
const PR_TRUNCATE = 0x20;
const PR_SYNC = 0x40;
const PR_EXCL = 0x80;
const OP_DELETE = 0;
const OP_ADD_FLOCK = 1;
const OP_ADD_HISTORY = 2;
function flockIndexer() {
var obs = Cc["@mozilla.org/observer-service;1"].getService(Ci.nsIObserverService);
obs.addObserver(this, "xpcom-shutdown", false);
}
flockIndexer.prototype = {
init: function INDEXER_init() {
this._enabled = false;
this._enabledWebHistory = false;
this._logger = Cc["@flock.com/logger;1"].createInstance(Ci.flockILogger);
this._logger.init("indexer");
this._logger.info("starting up...");
this._profiler = Cc["@flock.com/profiler;1"].getService(Ci.flockIProfiler);
this._RDFS = Cc["@mozilla.org/rdf/rdf-service;1"]
.getService(Ci.nsIRDFService);
this._resURL = this._RDFS.GetResource(PROP_URL);
this._resName = this._RDFS.GetResource(PROP_NAME);
this._resHistoryRoot = this._RDFS.GetResource("NC:HistoryRoot");
this._resHistoryChild = this._RDFS.GetResource(PROP_CHILD);
this._resIsIndexable = this._RDFS.GetResource(FLOCK_NS + "isIndexable");
this._resTitle = this._RDFS.GetResource(NSCP_NS + "Name");
this._resDescription = this._RDFS.GetResource(NSCP_NS + "Description");
this._resTags = this._RDFS.GetResource(FLOCK_NS + "tags");
this._searchService = Cc["@flock.com/lucene/flockLucene;1"]
.getService(Ci.flockILucene);
this._favService = Cc["@mozilla.org/rdf/datasource;1?name=flock-favorites"]
.getService(Ci.flockIRDFObservable);
this._historyRdf = Cc["@mozilla.org/rdf/datasource;1?name=history"]
.getService(Ci.nsIRDFDataSource);
this._ios = Cc["@mozilla.org/network/io-service;1"]
.getService(Ci.nsIIOService);
this._coop = Cc["@flock.com/singleton;1"]
.getService(Ci.flockISingleton)
.getSingleton("chrome://flock/content/common/load-faves-coop.js")
.wrappedJSObject;
var dirService = Cc["@mozilla.org/file/directory_service;1"]
.getService(Ci.nsIProperties);
var profileDir = dirService.get("ProfD", Ci.nsIFile);
// Clean up old cardinal index dir
var oldHistorySearchDir = profileDir.clone();
oldHistorySearchDir.append("historysearch");
try {
oldHistorySearchDir.remove(true);
}
catch (ex) {
}
// initialize Lucene
var luceneDir = profileDir.clone();
luceneDir.append("lucene");
this._searchService.init(luceneDir);
// initialize the queue
this._initQueue();
// init queue processing timer
this._timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
// init page text store
this._pageText = {};
// create tokenizer
this._tokenizer = Cc["@flock.com/tokenizer;1"].createInstance(Ci.flockITokenizer);
// init with current prefs
this.observe(null, "nsPref:changed", null);
// watch for pref changes
var prefService = Cc["@mozilla.org/preferences-service;1"]
.getService(Ci.nsIPrefBranch2);
prefService.addObserver("flock.service.indexer.enabled", this, false);
prefService.addObserver("flock.service.indexer.indexWebHistory", this, false);
},
rebuildIndex: function INDEXER_rebuildIndex() {
this._logger.info("rebuilding index...");
// reindex history
var records = this._historyRdf.GetTargets(this._resHistoryRoot,
this._resHistoryChild,
true);
var record, url;
while (records.hasMoreElements()) {
record = records.getNext().QueryInterface(Ci.nsIRDFResource);
try {
url = this._ios.newURI(record.ValueUTF8, null, null);
} catch (e) {
url = null;
}
if (url && url.scheme == "http") {
this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
}
}
},
_getQueueFile: function INDEXER__getQueueFile() {
var file = Cc["@mozilla.org/file/directory_service;1"]
.getService(Ci.nsIProperties).get("ProfD", Ci.nsILocalFile);
file.append("indexerQueue.js");
return file;
},
_initQueue: function INDEXER__initQueue() {
var queue;
try {
var file = this._getQueueFile();
var stream = Cc["@mozilla.org/network/file-input-stream;1"]
.createInstance(Ci.nsIFileInputStream);
stream.init(file, PR_RDONLY, 0, 0);
var cvstream = Cc["@mozilla.org/intl/converter-input-stream;1"]
.createInstance(Ci.nsIConverterInputStream);
cvstream.init(stream, "UTF-8", 1024,
Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
var content = "";
var data = {};
while (cvstream.readString(4096, data)) {
content += data.value;
}
cvstream.close();
queue = content.replace(/\r\n?/g, "\n");
file.remove(false);
} catch (e) {
queue = null;
}
if (queue) {
try {
var s = new Components.utils.Sandbox("about:blank");
this._queue = Components.utils.evalInSandbox(queue, s);
this._logger.info("restored queue");
return;
} catch (e) {
this._logger.error("unable to restore queue");
}
}
// default to empty queue
this._queue = [];
},
_saveQueue: function INDEXER__saveQueue() {
// do nothing if queue is empty
if (this._queue.length == 0)
return;
try {
this._logger.info("saving queue...");
var file = this._getQueueFile();
var ostream = Cc["@mozilla.org/network/safe-file-output-stream;1"]
.createInstance(Ci.nsIFileOutputStream);
ostream.init(file, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0600, 0);
var converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Ci.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
var data = this._queue.toSource();
var convdata = converter.ConvertFromUnicode(data) + converter.Finish();
ostream.write(convdata, convdata.length);
if (ostream instanceof Ci.nsISafeOutputStream) {
ostream.finish();
} else {
ostream.close();
}
} catch (e) {
this._logger.error("unable to save queue");
}
},
_enable: function INDEXER__enable() {
if (!this._enabled) {
// start watching the favorites and history graphs
this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_ALL, null,
this._resIsIndexable, null, this);
this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resTitle, null, this);
this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resDescription, null, this);
this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resTags, null, this);
this._historyRdf.AddObserver(this);
this._enabled = true;
this._enabledWebHistory = true;
this._logger.info("indexing enabled");
}
},
_disable: function INDEXER__disable() {
if (this._enabled) {
// stop watching the favorites and history graphs
this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_ALL, null,
this._resIsIndexable, null, this);
this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resTitle, null, this);
this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resDescription, null, this);
this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
this._resTags, null, this);
this._historyRdf.RemoveObserver(this);
this._enabled = false;
this._enabledWebHistory = false;
this._logger.info("indexing disabled");
}
},
_enableWebHistory: function INDEXER__enableWebHistory() {
if (this._enabled) {
if (!this._enabledWebHistory) {
this._historyRdf.AddObserver(this);
this._enabledWebHistory = true;
this._logger.info("web history indexing enabled");
}
}
},
_disableWebHistory: function INDEXER__disableWebHistory() {
if (this._enabled) {
if (this._enabledWebHistory) {
this._historyRdf.RemoveObserver(this);
this._enabledWebHistory = false;
this._logger.info("web history indexing disabled");
}
}
},
_shutdown: function INDEXER__shutdown() {
this._disable();
this._timer.cancel();
this._saveQueue();
this._logger.info("shut down");
},
_processQueue: function INDEXER__processQueue(sync) {
var batchSize = 1;
var c = 0;
while (this._queue.length > 0 && c < batchSize) {
c++;
var op = this._queue[0];
var instruction = op[0];
var uri = op[1];
switch (instruction) {
case OP_DELETE:
this._logger.info("removing " + uri);
if (sync) {
this._searchService.deleteDocumentSync(uri);
} else {
this._searchService.deleteDocument(uri, this);
}
break;
case OP_ADD_FLOCK:
var obj = this._coop.get(uri);
if (!obj) {
var msg = "trying to add nonexistent object: " + uri;
this._logger.error(msg);
throw Components.Exception(msg, Cr.NS_ERROR_UNEXPECTED);
}
var url = obj.URL;
if (!url) {
this._logger.warn("unable to get URL for " + uri);
url = "";
}
var type = obj.flockType;
var title = obj.name;
if (obj.tags) {
var tags = obj.tags;
} else {
var tags = "";
}
if (obj.description) {
var description = obj.description;
} else {
var description = "";
}
var text = "";
this._logger.info("indexing " + uri + " type: " + type + " title: " + title + " url: " + url);
if (sync) {
this._searchService.addDocumentSync(uri, type, url, title, tags, description, text);
} else {
this._searchService.addDocument(uri, type, url, title, tags, description, text, this);
}
break;
case OP_ADD_HISTORY:
this._logger.info("indexing " + uri + " type: history");
if (!sync) {
this._addHistoryDocument(uri);
}
break;
}
}
},
_addHistoryDocument: function INDEXER__addHistoryDocument(aURI) {
var uri = aURI;
var url = uri.substr("history:".length);
var title = "";
var tags = "";
var description = "";
var data = "";
var resource = this._historyRdf.GetSource(this._resURL,
this._RDFS.GetResource(url),
true);
if (resource) {
var title_node = this._historyRdf.GetTarget(resource, this._resName, true);
if (title_node && title_node.QueryInterface(Ci.nsIRDFLiteral)) {
title = title_node.Value;
this._logger.debug("got title");
}
}
if (url in this._pageText) {
data = this._pageText[url];
delete this._pageText[url];
}
this._searchService.addDocument(uri, "history", url, title, tags, description, data, this);
this._logger.debug("document indexed text: " + data);
},
_addOp: function INDEXER__addOp(aOp, aURI) {
this._queue.push([aOp, aURI]);
this._logger.info("queued operation op: " + aOp + " uri: " + aURI);
this._timer.initWithCallback(this, 250, Ci.nsITimer.TYPE_ONE_SHOT);
},
_retireOp: function INDEXER__retireOp() {
this._queue.shift();
if (this._queue.length > 0) {
this._timer.initWithCallback(this, 0, Ci.nsITimer.TYPE_ONE_SHOT);
}
},
_handlePageLoad: function INDEXER_handlePageLoad(document) {
var profilerEvt = this._profiler.profileEventStart("indexer-tokenize");
var url = document.documentURI;
this._logger.debug("_handlePageLoad: " + url);
if (document.body) {
this._pageText[url] = this._tokenizer.tokenizeDOMNode(document,
document.body);
}
this._addOp(OP_ADD_HISTORY, "history:" + url);
this._profiler.profileEventEnd(profilerEvt, url);
},
// nsIDOMEventListener
handleEvent: function INDEXER_handleEvent(aEvent) {
switch(aEvent.type) {
case "load":
var url;
try {
url = this._ios.newURI(aEvent.originalTarget.documentURI, null, null);
} catch (e) { }
if (url && url.scheme == "http") {
this._handlePageLoad(aEvent.originalTarget);
}
break;
}
},
// flockIMigratable
get migrationName() { return "Indexer"; },
// flockIMigratable
needsMigration: function INDEXER_needsMigration(oldVersion) {
return oldVersion.substr(0, 3) == "0.7";
},
// flockIMigratable
startMigration: function INDEXER_startMigration(oldVersion, listener) {
return null;
},
// flockIMigratable
finishMigration: function INDEXER_finishMigration(ctxtWrapper) {
},
// flockIMigratable
doMigrationWork: function INDEXER_doMigrationWork(ctxtWrapper) {
this.rebuildIndex();
return false;
},
// flockILuceneListener
onAddDocumentComplete: function INDEXER_onAddDocumentComplete(aURI) {
this._logger.debug("onAddDocumentComplete: " + aURI);
this._retireOp();
},
// flockILuceneListener
onDeleteDocumentComplete: function INDEXER_onDeleteDocumentComplete(aURI) {
this._logger.debug("onDeleteDocumentComplete: " + aURI);
this._retireOp();
},
// nsITimerCallback
notify: function INDEXER_notify(timer) {
this._processQueue(false);
},
// flockIRDFObserver
rdfChanged: function INDEXER__rdfChanged(ds, type, rsrc, pred, obj, oldObj) {
if (pred == this._resIsIndexable) {
switch (type) {
case Ci.flockIRDFObserver.TYPE_ASSERT:
var indexable = this._coop.get_from_resource(rsrc);
if (indexable.isIndexable) {
this._addOp(OP_ADD_FLOCK, indexable.id());
}
break;
case Ci.flockIRDFObserver.TYPE_CHANGE:
var indexable = this._coop.get_from_resource(rsrc);
var op = indexable.isIndexable ? OP_ADD_FLOCK : OP_DELETE;
this._addOp(op, indexable.id());
break;
case Ci.flockIRDFObserver.TYPE_UNASSERT:
rsrc.QueryInterface(Ci.nsIRDFResource);
this._addOp(OP_DELETE, rsrc.ValueUTF8);
break;
}
} else {
var indexable = this._coop.get_from_resource(rsrc);
if (indexable && indexable.isIndexable) {
this._addOp(OP_ADD_FLOCK, indexable.id());
}
}
},
// nsIRDFObserver
onAssert: function INDEXER_onAssert(ds, source, predicate, target) {
if (predicate.ValueUTF8 == PROP_NAME &&
ds.HasAssertion(this._resHistoryRoot, this._resHistoryChild,
source, true)) {
var url = null;
try {
url = this._ios.newURI(source.ValueUTF8, null, null);
}
catch (e) { }
if (url && url.scheme == "http") {
this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
}
}
},
// nsIRDFObserver
onUnassert: function INDEXER_onUnassert(ds, source, predicate, target) {
if (source.ValueUTF8 == "NC:HistoryRoot" &&
predicate.ValueUTF8 == PROP_CHILD) {
target.QueryInterface(Ci.nsIRDFResource);
var url = null;
try {
url = this._ios.newURI(target.ValueUTF8, null, null);
} catch (e) {}
if (url && url.scheme == "http") {
this._addOp(OP_DELETE, "history:" + url.spec);
}
}
},
// nsIRDFObserver
onChange: function INDEXER_onChange(ds, source, predicate, oldTarget, newTarget) {
if (predicate.ValueUTF8 == PROP_NAME &&
ds.HasAssertion(this._resHistoryRoot, this._resHistoryChild,
source, true)) {
var url = null;
try {
url = this._ios.newURI(source.ValueUTF8, null, null);
} catch (e) {}
if (url && url.scheme == "http") {
this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
}
}
},
// nsIRDFObserver
onMove: function INDEXER_onMove(ds, oldSource, newSource, predicate, target) {
},
// nsIRDFObserver
onBeginUpdateBatch: function INDEXER_onBeginUpdateBatch(ds) {
},
// nsIRDFObserver
onEndUpdateBatch: function INDEXER_onEndUpdateBatch(ds) {
},
// nsIObserver
observe: function INDEXER_observe(subject, topic, state) {
switch (topic) {
case "xpcom-shutdown":
var obs = Cc["@mozilla.org/observer-service;1"]
.getService(Ci.nsIObserverService);
obs.removeObserver(this, "xpcom-shutdown");
this._shutdown();
return;
case "nsPref:changed":
var prefService = Cc["@mozilla.org/preferences-service;1"]
.getService(Ci.nsIPrefBranch);
if (prefService.getPrefType("flock.service.indexer.enabled")) {
if (prefService.getBoolPref("flock.service.indexer.enabled")) {
this._enable();
} else {
this._disable();
}
} else {
this._enable();
}
if (prefService.getPrefType("flock.service.indexer.indexWebHistory")) {
if (prefService.getBoolPref("flock.service.indexer.indexWebHistory")) {
this._enableWebHistory();
} else {
this._disableWebHistory();
}
} else {
this._enableWebHistory();
}
break;
}
},
// nsIClassInfo
getInterfaces: function INDEXER_getInterfaces(aCount) {
var interfaces = [Ci.flockIIndexer, Ci.nsIClassInfo, Ci.nsIObserver,
Ci.flockIRDFObserver, Ci.nsIRDFObserver,
Ci.nsITimerCallback, Ci.flockILuceneListener,
Ci.flockIMigratable, Ci.nsIDOMEventListener];
aCount.value = interfaces.length;
return interfaces;
},
// nsIClassInfo
getHelperForLanguage: function INDEXER_getHelperForLanguage(aLanguage) {
return null;
},
// nsIClassInfo
contractID: CONTRACT_ID,
// nsIClassInfo
classDescription: CLASS_NAME,
// nsIClassInfo
classID: CLASS_ID,
// nsIClassInfo
implementationLanguage: Ci.nsIProgrammingLanguage.JAVASCRIPT,
// nsIClassInfo
flags: Ci.nsIClassInfo.SINGLETON,
// nsISupports
QueryInterface: function INDEXER_QueryInterface(aIID) {
if (!aIID.equals(Ci.nsISupports) &&
!aIID.equals(Ci.flockIIndexer) &&
!aIID.equals(Ci.nsIClassInfo) &&
!aIID.equals(Ci.nsIObserver) &&
!aIID.equals(Ci.flockIRDFObserver) &&
!aIID.equals(Ci.nsIRDFObserver) &&
!aIID.equals(Ci.nsITimerCallback) &&
!aIID.equals(Ci.flockILuceneListener) &&
!aIID.equals(Ci.flockIMigratable) &&
!aIID.equals(Ci.nsIDOMEventListener))
throw Cr.NS_ERROR_NO_INTERFACE;
return this;
}
};
/******************************************************************************
* XPCOM Functions for construction and registration
******************************************************************************/
var Module = {
_firstTime: true,
registerSelf: function(aCompMgr, aFileSpec, aLocation, aType) {
if (this._firstTime) {
this._firstTime = false;
throw Cr.NS_ERROR_FACTORY_REGISTER_AGAIN;
}
aCompMgr = aCompMgr.QueryInterface(Ci.nsIComponentRegistrar);
aCompMgr.registerFactoryLocation(CLASS_ID, CLASS_NAME, CONTRACT_ID, aFileSpec, aLocation, aType);
var catman = Cc["@mozilla.org/categorymanager;1"].getService(Ci.nsICategoryManager);
catman.addCategoryEntry("flockMigratable", CLASS_NAME, CONTRACT_ID, true, true);
},
unregisterSelf: function(aCompMgr, aLocation, aType) {
aCompMgr = aCompMgr.QueryInterface(Ci.nsIComponentRegistrar);
aCompMgr.unregisterFactoryLocation(CLASS_ID, aLocation);
},
getClassObject: function(aCompMgr, aCID, aIID) {
if (!aIID.equals(Ci.nsIFactory))
throw Cr.NS_ERROR_NOT_IMPLEMENTED;
if (aCID.equals(CLASS_ID))
return Factory;
throw Cr.NS_ERROR_NO_INTERFACE;
},
canUnload: function(aCompMgr) { return true; }
};
var Factory = {
createInstance: function(aOuter, aIID)
{
if (aOuter != null)
throw Cr.NS_ERROR_NO_AGGREGATION;
return (new flockIndexer()).QueryInterface(aIID);
}
};
function NSGetModule(aCompMgr, aFileSpec) { return Module; }